In [1]:
import networkx as nx
import math
import numpy as np
In [2]:
from IPython.display import Javascript
In [3]:
lastnode = 5000
In [4]:
datafile = open('/var/datasets/wdc/small-pld-arc')
G = nx.DiGraph()
for line in datafile:
ijstr = line.split('\t')
i=int(ijstr[0])
j=int(ijstr[1])
if i>lastnode:
break
if j>lastnode:
continue
G.add_edge(i,j)
datafile.close()
Gorig = G.copy()
In [5]:
indexfile = open('/var/datasets/wdc/small-pld-index')
index = {}
for line in indexfile:
namei = line.split('\t')
name=namei[0]
i=int(namei[1])
if i>lastnode:
break
index[i]=name
indexfile.close()
In [6]:
def cleanupgraph(G):
comp = nx.weakly_connected_components(G.copy())
for c in comp:
if len(c)<4:
G.remove_nodes_from(c)
In [7]:
def graphcleanup(G):
for (node, deg) in G.degree_iter():
if deg==0:
G.remove_node(node)
elif deg==1:
if G.degree((G.predecessors(node) + G.successors(node))[0]) == 1:
G.remove_node(node)
elif deg==2 and G.in_degree(node)==1:
if (G.predecessors(node) == G.successors(node)) and G.degree((G.predecessors(node) + G.successors(node))[0]) == 2:
G.remove_node(node)
In [8]:
cleanupgraph(G)
In [9]:
G.size()
Out[9]:
In [10]:
Gorig.number_of_nodes()
Out[10]:
In [11]:
Gorig.size()
Out[11]:
In [12]:
#from IPython.core.display import display_javascript
import json
from networkx.readwrite import json_graph
In [13]:
d = json_graph.node_link_data(G)
for node in d['nodes']:
node['name']=node['id']
node['value']=G.degree(node['id'])
if True:
node['group'] = node['id'] % 4
else:
if node['id']<10:
node['group']=0#node['id'] % 4
else:
node['group']=1#node['id'] % 4
d['adjacency'] = json_graph.adjacency_data(G)['adjacency']
json.dump(d, open('rwgraph.json','w'))
In [37]:
%%html
<div id="d3-example"></div>
<style>
.node {stroke: #fff; stroke-width: 1.5px;}
.link {stroke: #999; stroke-opacity: .3;}
</style>
<script src="randomwalk.js"></script>
In [17]:
Javascript(filename='force.js')
In [50]:
In [19]:
L = nx.linalg.laplacianmatrix.directed_laplacian_matrix(G)
Linv = np.linalg.inv(L)
In [20]:
L.shape
Out[20]:
In [21]:
n = L.shape[0]
Reff = np.zeros((n,n))
In [22]:
Gsparse = G.copy()
In [23]:
graphcleanup(Gsparse)
In [24]:
nodelookup={Gsparse.nodes()[idx]:idx for idx in range(len(Gsparse.nodes()))}
In [25]:
edge = np.zeros((n,1))
for (i,j) in Gsparse.edges_iter():
edge[nodelookup[i]] = 1
edge[nodelookup[j]] = -1
Reff[nodelookup[i],nodelookup[j]] = edge.T.dot(Linv.dot(edge))
edge[[nodelookup[i]]] = 0
edge[[nodelookup[j]]] = 0
In [26]:
ReffAbs=np.abs(Reff)+np.abs(Reff.T)
If you call
arr.argsort()[:3] It will give you the indices of the 3 smallest elements.
array([0, 2, 1], dtype=int64) So, for n, you should call
arr.argsort()[:n]
In [27]:
res = ReffAbs.reshape(n**2)
argp = np.argpartition(res,n**2-n)
In [28]:
mask = (ReffAbs < res[argp[-int(0.5*Gsparse.number_of_nodes())]]) & (ReffAbs >0)
for (i,j) in Gsparse.edges():
if mask[nodelookup[i],nodelookup[j]]:
Gsparse.remove_edge(i,j)
In [29]:
cleanupgraph(Gsparse)
In [30]:
d = json_graph.node_link_data(Gsparse)
for node in d['nodes']:
node['name']=index[node['id']]
node['value']=Gsparse.degree(node['id'])
node['group']=index[node['id']][-3:]
json.dump(d, open('graph.json','w'))
In [31]:
Gorig.number_of_edges()
Out[31]:
In [32]:
Gsparse.number_of_edges()
Out[32]:
In [33]:
Gsparse.number_of_nodes()
Out[33]:
In [ ]:
In [34]:
GsparseAdj = nx.linalg.adjacency_matrix(Gorig).toarray()
GsparseAdj = nx.to_numpy_matrix(Gorig)
GsparseAdj[ReffAbs < res[argp[-300]]] = 0
Gsparse = nx.from_numpy_matrix?
Gsparse = nx.from_numpy_matrix
Gsparse = nx.from_numpy_matrix(GsparseAdj, create_using=nx.DiGraph())
In [ ]:
Gsparse = nx.from_numpy_matrix
In [35]:
edge = np.zeros((n,1))
for i in range(n):
if i % int(math.ceil((float(10)/100)*n)) == 0:
print int(math.floor(100*float(i)/n)), '%'
edge[i] = 1
for j in range(i+1, n):
edge[j] = -1
Reff[i,j] = edge.T.dot(Linv.dot(edge))
edge[j] = 0
edge[i] = 0
In [ ]: